R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

rm(list = ls(all=TRUE)) # clear global environment
library(tidyr) # data loading
library(dplyr) # dataframe manipulation
library(stringr) # string manipulation
library(ggplot2) # plotting

# or simply library(tidyverse)

options(scipen = 999) # turn off scientific notation

path <- file.path("C:","Users","Paulina-laptop","Desktop","R_Programming","RLadies_workshop")

setwd(path)
sprintf("Current directory: %s", getwd())
## [1] "Current directory: C:/Users/Paulina-laptop/Desktop/R_Programming/RLadies_workshop"

Data loading & preparation

Let’s load some data first

wells <- read.csv(unzip("BCOGC_data/prod_csv.zip", "zone_prd_2016_to_present.csv"), skip = 1)

head(wells)
##   Wa_num Compltn_event_seq Prod_period              UWI Area_code Formtn_code
## 1     29                 0      201601 100142108318W600      3600        6200
## 2     29                 0      201603 100142108318W600      3600        6200
## 3     29                 0      201611 100142108318W600      3600        6200
## 4     29                 0      201612 100142108318W600      3600        6200
## 5     29                 0      201701 100142108318W600      3600        6200
## 6     29                 0      201702 100142108318W600      3600        6200
##   Pool_seq Gas_prod_vol..e3m3. Oil_prod_vol..m3. Water_prod_vol..m3.
## 1        A               107.7                 0                 2.3
## 2        A                45.2                 0                 1.7
## 3        A               166.2                 0                 5.0
## 4        A               159.2                 0                 2.6
## 5        A               133.5                 0                 3.7
## 6        A                95.8                 0                 4.1
##   Cond_prod_vol..m3. Prod_days. Gas_prod_cum..e3m3. Oil_prod_cum..m3.
## 1                0.5       31.0               107.7                 0
## 2                0.3       13.0               152.9                 0
## 3                1.3       22.0               319.1                 0
## 4                0.5       31.0               478.3                 0
## 5                0.6       31.0               611.8                 0
## 6                0.5       27.8               707.6                 0
##   Water_prod_cum..m3. Cond_prod_cum..m3. Project_code
## 1                 2.3                0.5            0
## 2                 4.0                0.8            0
## 3                 9.0                2.1            0
## 4                11.6                2.6            0
## 5                15.3                3.2            0
## 6                19.4                3.7            0

Column names in the original file have inconvenient interpunction, which can be easily fixed:

names(wells)
##  [1] "Wa_num"              "Compltn_event_seq"   "Prod_period"        
##  [4] "UWI"                 "Area_code"           "Formtn_code"        
##  [7] "Pool_seq"            "Gas_prod_vol..e3m3." "Oil_prod_vol..m3."  
## [10] "Water_prod_vol..m3." "Cond_prod_vol..m3."  "Prod_days."         
## [13] "Gas_prod_cum..e3m3." "Oil_prod_cum..m3."   "Water_prod_cum..m3."
## [16] "Cond_prod_cum..m3."  "Project_code"
names(wells) <- str_replace_all(names(wells), c(" " = "" , 
                                                "," = "",
                                                "\\("="_",
                                                "\\)"="",
                                                "\\.."="_",
                                                "\\."=""
                                                ))
names(wells)
##  [1] "Wa_num"            "Compltn_event_seq" "Prod_period"      
##  [4] "UWI"               "Area_code"         "Formtn_code"      
##  [7] "Pool_seq"          "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"  
## [10] "Water_prod_vol_m3" "Cond_prod_vol_m3"  "Prod_days"        
## [13] "Gas_prod_cum_e3m3" "Oil_prod_cum_m3"   "Water_prod_cum_m3"
## [16] "Cond_prod_cum_m3"  "Project_code"
wells <- subset(wells, select=c(Wa_num, Prod_period, Prod_days, Area_code, Formtn_code, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Water_prod_vol_m3, Cond_prod_vol_m3)) # select only useful columns

area_codes <- read.table("BCOGC_data/ogc_area_codes.csv", 
                         sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(area_codes, 3)
##   Area.Code    Area.Name
## 1        50       ADSETT
## 2       100      AIRPORT
## 3       200 AITKEN CREEK
# rename so that we have the same name of Area_code and Area_name in all columns 
area_codes <- rename(area_codes, Area_code = Area.Code, Area_name = Area.Name)

formation_codes <- read.table("BCOGC_data/ogc_formation_codes.csv", 
                              sep = ",", skip = 1, header = TRUE, stringsAsFactors = FALSE)
head(formation_codes, 3)
##   Formation.Code        Formation.Name
## 1           4610 A MARKER/BASE OF LIME
## 2           2703          AITKEN CREEK
## 3           9922              ALDRIDGE
formation_codes <- rename(formation_codes, Formtn_code = Formation.Code, Formtn_name = Formation.Name)

head(area_codes,3)
##   Area_code    Area_name
## 1        50       ADSETT
## 2       100      AIRPORT
## 3       200 AITKEN CREEK
head(formation_codes,3)
##   Formtn_code           Formtn_name
## 1        4610 A MARKER/BASE OF LIME
## 2        2703          AITKEN CREEK
## 3        9922              ALDRIDGE

Merge dataframes to create 2 additional columns with area and name as the wells file contains only its numerical codes.

wells <- merge(wells, area_codes, by = 'Area_code')
wells <- merge(wells, formation_codes, by = 'Formtn_code')

# we don't need them anymore in memory and dataframe
rm(area_codes, formation_codes) # remove form memory
wells <- subset(wells, select = -c(Area_code, Formtn_code)) # remove from dataframe

First glimpse on the data. Notice newly created columns, Area_name and Formtn_name at the end.

names(wells)
## [1] "Wa_num"            "Prod_period"       "Prod_days"        
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3"  "Area_name"         "Formtn_name"
str(wells)
## 'data.frame':    517518 obs. of  9 variables:
##  $ Wa_num           : int  14893 25370 25370 26962 14893 14893 25371 25371 25371 26962 ...
##  $ Prod_period      : int  201809 202003 201901 201709 201805 202001 202011 201912 201805 201907 ...
##  $ Prod_days        : num  30 30.7 30.8 23 31 31 28.5 30.4 30.4 28 ...
##  $ Gas_prod_vol_e3m3: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Oil_prod_vol_m3  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Water_prod_vol_m3: num  91.4 1291 2528 1068 6948.7 ...
##  $ Cond_prod_vol_m3 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Area_name        : chr  "DESAN" "PEEJAY WEST" "PEEJAY WEST" "BEAVERTAIL" ...
##  $ Formtn_name      : chr  "QUATERNARY" "QUATERNARY" "QUATERNARY" "QUATERNARY" ...
summary(wells)
##      Wa_num       Prod_period       Prod_days     Gas_prod_vol_e3m3 
##  Min.   :   29   Min.   :201601   Min.   : 0.00   Min.   :     0.0  
##  1st Qu.:15183   1st Qu.:201704   1st Qu.:25.60   1st Qu.:    38.2  
##  Median :22328   Median :201807   Median :30.00   Median :   124.8  
##  Mean   :21074   Mean   :201810   Mean   :26.08   Mean   :   555.4  
##  3rd Qu.:28257   3rd Qu.:201910   3rd Qu.:31.00   3rd Qu.:   608.5  
##  Max.   :41055   Max.   :202101   Max.   :31.10   Max.   :119042.3  
##  Oil_prod_vol_m3   Water_prod_vol_m3 Cond_prod_vol_m3   Area_name        
##  Min.   :   0.00   Min.   :    0.0   Min.   :   0.00   Length:517518     
##  1st Qu.:   0.00   1st Qu.:    0.9   1st Qu.:   0.00   Class :character  
##  Median :   0.00   Median :    5.0   Median :   0.00   Mode  :character  
##  Mean   :  10.77   Mean   :  181.4   Mean   :  19.31                     
##  3rd Qu.:   0.00   3rd Qu.:   52.6   3rd Qu.:   1.00                     
##  Max.   :7089.30   Max.   :29177.7   Max.   :7424.00                     
##  Formtn_name       
##  Length:517518     
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# Let's check number of formations in the file...
length(unique(wells$Formtn_name)) 
## [1] 92
# ...and list them
unique(wells$Formtn_name)
##  [1] "QUATERNARY"                   "CRETACEOUS"                  
##  [3] "CARDIUM SAND"                 "DOE CREEK"                   
##  [5] "DUNVEGAN"                     "BASE OF FISH SCALES"         
##  [7] "SCATTER"                      "PADDY"                       
##  [9] "PADDY- CADOTTE"               "CADOTTE"                     
## [11] "SPIRIT RIVER"                 "NOTIKEWIN"                   
## [13] "FALHER"                       "FALHER A"                    
## [15] "FALHER B"                     "FALHER C"                    
## [17] "FALHER D"                     "WILRICH"                     
## [19] "BLUESKY"                      "BASAL BLUESKY"               
## [21] "BLUESKY-GETHING"              "BLUESKY-GETHING-DETRITAL"    
## [23] "DETRITAL"                     "GETHING"                     
## [25] "LOWER GETHING"                "BASAL GETHING"               
## [27] "GETHING-BALDONNEL"            "CADOMIN"                     
## [29] "CHINKEH"                      "NIKANASSIN"                  
## [31] "DUNLEVY"                      "LOWER DUNLEVY"               
## [33] "ROCK CREEK"                   "NORDEGG"                     
## [35] "NORDEGG-BALDONNEL"            "PARDONET-BALDONNEL"          
## [37] "BALDONNEL"                    "BALDONNEL/UPPER CHARLIE LAKE"
## [39] "CHARLIE LAKE"                 "SIPHON"                      
## [41] "CECIL"                        "NANCY"                       
## [43] "FLATROCK"                     "BOUNDARY LAKE"               
## [45] "YELLOW MARKER"                "COPLIN"                      
## [47] "MICA"                         "BLUEBERRY"                   
## [49] "INGA"                         "NORTH PINE"                  
## [51] "BEAR FLAT"                    "PINGEL"                      
## [53] "LIMESTONE A BED"              "A MARKER/BASE OF LIME"       
## [55] "LOWER CHARLIE LAKE SANDS"     "ARTEX"                       
## [57] "ARTEX/HALFWAY"                "UPPER HALFWAY"               
## [59] "HALFWAY"                      "LOWER HALFWAY"               
## [61] "DOIG"                         "DOIG PHOSPHATE BEDS"         
## [63] "BLUESKY-GETHING-MONTNEY"      "LOWER CHARLIE LAKE/MONTNEY"  
## [65] "DOIG PHOSPHATE-MONTNEY"       "MONTNEY"                     
## [67] "BELLOY"                       "BELCOURT"                    
## [69] "BELCOURT-TAYLOR FLAT"         "BELLOY-KISKATINAW"           
## [71] "TAYLOR FLAT"                  "MISSISSIPPIAN"               
## [73] "KISKATINAW"                   "LOWER KISKATINAW"            
## [75] "BASAL KISKATINAW"             "UPPER DEBOLT"                
## [77] "DEBOLT"                       "ELKTON"                      
## [79] "SHUNDA"                       "PEKISKO"                     
## [81] "BANFF"                        "BESA RIVER"                  
## [83] "WABAMUN"                      "TETCHO"                      
## [85] "KAKISA"                       "JEAN MARIE"                  
## [87] "MUSKWA"                       "MUSKWA-OTTER PARK"           
## [89] "SLAVE POINT"                  "SULPHUR POINT"               
## [91] "EVIE"                         "PINE POINT"

Key dplyr functions:

filter - filter rows by values

slave_pnt <- filter(wells, Formtn_name == "SLAVE POINT" )
unique(slave_pnt$Formtn_name)
## [1] "SLAVE POINT"
muskwa <- filter(wells, Formtn_name %in% c("MUSKWA", "MUSKWA-OTTER PARK"))
unique(muskwa$Formtn_name)
## [1] "MUSKWA"            "MUSKWA-OTTER PARK"
# same as
muskwa <- filter(wells, Formtn_name == "MUSKWA" | Formtn_name == "MUSKWA-OTTER PARK")
unique(muskwa$Formtn_name)
## [1] "MUSKWA"            "MUSKWA-OTTER PARK"

arrange - sort dataframe

Wa_num_sorted <- arrange(wells, Wa_num) # ascending
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1     29      201703      30.0              73.0               0
## 2     29      201704      24.0              64.7               0
## 3     29      201706       3.0               7.2               0
## 4     29      201707      15.1              53.8               0
## 5     29      201611      22.0             166.2               0
## 6     29      201601      31.0             107.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3    Area_name Formtn_name
## 1               2.8              0.3 FORT ST JOHN      BELLOY
## 2               1.9              0.3 FORT ST JOHN      BELLOY
## 3               0.3              0.1 FORT ST JOHN      BELLOY
## 4               2.0              0.5 FORT ST JOHN      BELLOY
## 5               5.0              1.3 FORT ST JOHN      BELLOY
## 6               2.3              0.5 FORT ST JOHN      BELLOY
Wa_num_sorted <- arrange(wells, desc(Wa_num)) # descending
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  41055      202012      30.6           15051.0               0
## 2  41055      202101      30.8           12528.5               0
## 3  41055      202011       5.5            1293.3               0
## 4  41054      202011       5.6            1658.9               0
## 5  41054      202012      30.8           19143.3               0
## 6  41054      202101      30.8           17839.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1            3630.5            950.8  HERITAGE     MONTNEY
## 2            2087.3            770.4  HERITAGE     MONTNEY
## 3            3396.8            110.8  HERITAGE     MONTNEY
## 4            3444.7            317.9  HERITAGE     MONTNEY
## 5            4204.7           1691.9  HERITAGE     MONTNEY
## 6            2476.8           1170.2  HERITAGE     MONTNEY
Wa_num_sorted <- arrange(wells, desc(Wa_num), Prod_period) # by multiple conditions
head(Wa_num_sorted)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  41055      202011       5.5            1293.3               0
## 2  41055      202012      30.6           15051.0               0
## 3  41055      202101      30.8           12528.5               0
## 4  41054      202011       5.6            1658.9               0
## 5  41054      202012      30.8           19143.3               0
## 6  41054      202101      30.8           17839.7               0
##   Water_prod_vol_m3 Cond_prod_vol_m3 Area_name Formtn_name
## 1            3396.8            110.8  HERITAGE     MONTNEY
## 2            3630.5            950.8  HERITAGE     MONTNEY
## 3            2087.3            770.4  HERITAGE     MONTNEY
## 4            3444.7            317.9  HERITAGE     MONTNEY
## 5            4204.7           1691.9  HERITAGE     MONTNEY
## 6            2476.8           1170.2  HERITAGE     MONTNEY
rm(Wa_num_sorted)

select - select dataframe by columns

names(wells)
## [1] "Wa_num"            "Prod_period"       "Prod_days"        
## [4] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Water_prod_vol_m3"
## [7] "Cond_prod_vol_m3"  "Area_name"         "Formtn_name"
head(select(wells, Area_name, Formtn_name))
##     Area_name Formtn_name
## 1       DESAN  QUATERNARY
## 2 PEEJAY WEST  QUATERNARY
## 3 PEEJAY WEST  QUATERNARY
## 4  BEAVERTAIL  QUATERNARY
## 5       DESAN  QUATERNARY
## 6       DESAN  QUATERNARY
# instead of listing columns separately, we can list multiple columns next to each other using ":"
head(select(wells, Gas_prod_vol_e3m3:Cond_prod_vol_m3, Area_name, Formtn_name))
##   Gas_prod_vol_e3m3 Oil_prod_vol_m3 Water_prod_vol_m3 Cond_prod_vol_m3
## 1                 0               0              91.4                0
## 2                 0               0            1291.0                0
## 3                 0               0            2528.0                0
## 4                 0               0            1068.0                0
## 5                 0               0            6948.7                0
## 6                 0               0             109.3                0
##     Area_name Formtn_name
## 1       DESAN  QUATERNARY
## 2 PEEJAY WEST  QUATERNARY
## 3 PEEJAY WEST  QUATERNARY
## 4  BEAVERTAIL  QUATERNARY
## 5       DESAN  QUATERNARY
## 6       DESAN  QUATERNARY

mutate - create new columns

The same wells have been listed multiple times as there are various production periods each year. Let’s create new column Prod_year to make more general summary.

head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
wells <- wells %>%
  mutate(Prod_year = substr(Prod_period, 1, 4), # first 4 characters
         Prod_month = substr(Prod_period, 5, 6), # 5th and 6th character
         Prod_ym = paste(Prod_year, Prod_month, sep="-")) #to create "time points" for plotting

head(wells)
##   Wa_num Prod_period Prod_days Gas_prod_vol_e3m3 Oil_prod_vol_m3
## 1  14893      201809      30.0                 0               0
## 2  25370      202003      30.7                 0               0
## 3  25370      201901      30.8                 0               0
## 4  26962      201709      23.0                 0               0
## 5  14893      201805      31.0                 0               0
## 6  14893      202001      31.0                 0               0
##   Water_prod_vol_m3 Cond_prod_vol_m3   Area_name Formtn_name Prod_year
## 1              91.4                0       DESAN  QUATERNARY      2018
## 2            1291.0                0 PEEJAY WEST  QUATERNARY      2020
## 3            2528.0                0 PEEJAY WEST  QUATERNARY      2019
## 4            1068.0                0  BEAVERTAIL  QUATERNARY      2017
## 5            6948.7                0       DESAN  QUATERNARY      2018
## 6             109.3                0       DESAN  QUATERNARY      2020
##   Prod_month Prod_ym
## 1         09 2018-09
## 2         03 2020-03
## 3         01 2019-01
## 4         09 2017-09
## 5         05 2018-05
## 6         01 2020-01
class(wells$Prod_ym)
## [1] "character"

summarize - summary within the group

head(wells$Prod_period)
## [1] 201809 202003 201901 201709 201805 202001
prod_days_by_area <- wells %>%
  group_by(Area_name) %>%
  summarize(
    total_prod_days = sum(Prod_days)
    )

head(prod_days_by_area)
## # A tibble: 6 x 2
##   Area_name          total_prod_days
##   <chr>                        <dbl>
## 1 ADSETT                       6385.
## 2 AIRPORT                       141.
## 3 AITKEN CREEK                 4164.
## 4 AITKEN CREEK NORTH           1885.
## 5 ALTARES                     14861.
## 6 BEAVERDAM                    6439.
prod_days_by_area %>% filter(total_prod_days > 500000) %>% 
  ggplot(aes(x = Area_name, y = total_prod_days)) + 
  geom_col() + 
  ggtitle("Total Production Days for most productive areas", ) +
  ylab("Production days") + 
  xlab("Area")

histograms

Histograms are commonly used to visualize the distribution of our data. The bars of histogram correspond to the number of observations within specific bin. Note: it’s recommended to check different bin sizes to find the most optimal bin for the data.

wells %>% filter(Cond_prod_vol_m3 > 0) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# let's limit our data to the wells which had production (100-1000). Here, we use default number of bins (30)
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# increase number of bins to 200 to see the difference
wells %>% filter(Cond_prod_vol_m3 > 100, Cond_prod_vol_m3 < 1000) %>% 
  ggplot() + 
  geom_histogram(mapping = aes(Cond_prod_vol_m3), bins=200) + 
  ggtitle("Distribution of condensate production per production period") + 
  xlab(expression("Condensate production per production period m"^3))

tables

Tables are a nice way to present numerical data and identify the trends. Let’s analyze the water production in different formations.

# Using tables we can see how many wells we have for each formation
wells %>% group_by(Formtn_name) %>%
  summarize(
    count_wells = n_distinct(Wa_num),#count unique well numbers (multiple prod. periods)
    mean_wtr_prod = mean(Water_prod_vol_m3)
  ) %>% arrange(desc(count_wells))
## # A tibble: 92 x 3
##    Formtn_name             count_wells mean_wtr_prod
##    <chr>                         <int>         <dbl>
##  1 MONTNEY                        4745        150.  
##  2 JEAN MARIE                     1531          3.21
##  3 HALFWAY                         659         62.2 
##  4 BLUESKY                         589       2038.  
##  5 CADOMIN                         535         10.7 
##  6 NOTIKEWIN                       406          5.03
##  7 BALDONNEL                       356         62.3 
##  8 BLUESKY-GETHING-MONTNEY         354          1.34
##  9 GETHING                         330          2.33
## 10 BOUNDARY LAKE                   278        483.  
## # ... with 82 more rows
# we can find which areas / formations are suspected to have more water disposal wells than HC production wells
wells %>% group_by(Area_name) %>%
  summarize(
    count_wells = n_distinct(Wa_num), 
    mean_wtr_prod = mean(Water_prod_vol_m3) 
  ) %>% arrange(desc(mean_wtr_prod))
## # A tibble: 169 x 3
##    Area_name   count_wells mean_wtr_prod
##    <chr>             <int>         <dbl>
##  1 HAY RIVER           211         4481.
##  2 KLUA                  1         4328.
##  3 SUNRISE              14         2990.
##  4 TOWER LAKE            7         2458.
##  5 CLARKE LAKE          38         2145.
##  6 LAPP                  9         1699.
##  7 GOPHER                1         1527.
##  8 WOODRUSH              8          867.
##  9 TWO RIVERS            9          708.
## 10 LIARD                 6          700.
## # ... with 159 more rows

scatterplots

Scatterplots are useful when we want to show the relationship between 2 variables.

We can convert our dataframe to the long format to have all types of hydrocarbon production in one column using gather function. Picture below visualizes this:

# let's look again on the selected columns of original dataframe
wells %>% select(Water_prod_vol_m3, Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3) %>% 
  head(3)
##   Water_prod_vol_m3 Gas_prod_vol_e3m3 Oil_prod_vol_m3 Cond_prod_vol_m3
## 1              91.4                 0               0                0
## 2            1291.0                 0               0                0
## 3            2528.0                 0               0                0
# we will use gather function to create new column "Prod_type" which will contain the "Prod_volume" value for gas, oil and condensate production. We assign the results to new dataframe.
prod_df <- wells %>% 
  gather(key = "Prod_type",
         value = "Prod_volume",
         c(Gas_prod_vol_e3m3, Oil_prod_vol_m3, Cond_prod_vol_m3)) %>% 
  select(Water_prod_vol_m3, Prod_volume, Prod_year, Prod_ym, Prod_type, Prod_days, Formtn_name)

prod_df %>% select(Water_prod_vol_m3, Prod_type) %>% head(3)
##   Water_prod_vol_m3         Prod_type
## 1              91.4 Gas_prod_vol_e3m3
## 2            1291.0 Gas_prod_vol_e3m3
## 3            2528.0 Gas_prod_vol_e3m3
# indeed, we have production type in Prod_type column!
unique(prod_df$Prod_type)
## [1] "Gas_prod_vol_e3m3" "Oil_prod_vol_m3"   "Cond_prod_vol_m3"
# let's visualize that quickly
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point()

# add some axis limits
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point() + 
  xlim(0,10000) + 
  ylim(0,25000)
## Warning: Removed 45 rows containing missing values (geom_point).

# visualize each type of production separately
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type)) +
  geom_point() + 
  facet_wrap(~Prod_type)

# create indepentent y axis for each chart, since the production is in different units.
prod_df %>% 
  filter(Prod_year == 2021) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
  geom_point() + 
  facet_wrap(~Prod_type, scales = "free") # scales free => not fixed limits of yaxis

# make grid using 2 columns
prod_df %>% 
  filter(Prod_year == 2021, Formtn_name %in% c("MONTNEY","BLUESKY")) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
  geom_point() + 
  facet_wrap(Formtn_name~Prod_type, scales = "free") # scales free => not fixed limits of yaxis

# same but with facet_grid (notice we put alpha outside aes() for comparison) scales='free" doesn't work for facet_grid.
prod_df %>% 
  filter(Prod_year == 2021, Formtn_name %in% c("MONTNEY","BLUESKY")) %>% 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type), alpha = 0.3) +
  geom_point() + 
  facet_grid(Formtn_name~Prod_type, scales = "free") # scales free => not fixed limits of yaxis

# I expect that Prod_volume = 0 correspond to Saltwater Disposal wells, so let's remove those from the plot.
prod_df %>% 
  filter(Prod_year == 2021, Prod_volume > 0) %>% # Prod_volume > 0 to remove disposal wells 
  ggplot(aes(x = Water_prod_vol_m3, y = Prod_volume, colour = Prod_type, alpha = 0.3)) +
  geom_point() + 
  ggtitle("Water production per HC type") +
  facet_wrap(~Prod_type, scales = "free") +
  xlim(0,5000) 
## Warning: Removed 126 rows containing missing values (geom_point).

bar charts

Bar charts can be created in using two geometries: * geom_col - by default height of the bars represent value in the data * geom_bar - by default counts number of cases in each group

### Let's start with geom_col to visualize the production per period...
wells %>% subset(Prod_year > 2018) %>% 
  group_by(Prod_ym) %>% 
  summarise(
    prod_per_period = sum(Cond_prod_vol_m3)
  ) %>% 
  ggplot(aes(x = Prod_ym, y = prod_per_period)) + 
  theme(axis.text.x = element_text(angle = 90)) + 
  geom_col()

### ...and per year. Which one is more useful / interesting?
wells %>% 
  subset(Prod_year > 2018) %>%
  group_by(Prod_year) %>% 
  summarise(
    prod_per_year = sum(Cond_prod_vol_m3)
  ) %>% 
  ggplot(aes(x = Prod_year, y = prod_per_year)) + 
  geom_col()

We can apply simple trick to order the formations from the highest production to the lowest (using mutate function).

# first let's create dataframe with summed produced values by formation
prod_by_formation <- wells %>% 
  group_by(Formtn_name) %>%
  summarise(
    oil_prod_total = sum(Oil_prod_vol_m3),
    gas_prod_total = sum(Gas_prod_vol_e3m3),
    cond_prod_total = sum(Cond_prod_vol_m3), 
    water_prod_total = sum(Water_prod_vol_m3)
    ) %>%
  ungroup()

# find 5 most productive gas formation
most_gas <- prod_by_formation %>% 
  arrange(desc(gas_prod_total)) %>% 
  head(5) 

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill='#ff6347') # change color to red

# we can use a trick to order the bars according to production value (using mutate function)
most_gas <- most_gas %>%  
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill='#ff6347') # change color to red

# same for oil production
most_oil <- prod_by_formation %>% 
  arrange(desc(oil_prod_total)) %>% 
  head(5) %>% 
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_oil, aes(x = Formtn_name, y = oil_prod_total)) +
  geom_col()

# and for condensate production
most_cond <- prod_by_formation %>% 
  arrange(desc(cond_prod_total)) %>% 
  head(5) %>% 
  mutate(Formtn_name=factor(Formtn_name, levels=Formtn_name))

ggplot(most_gas, aes(x = Formtn_name, y = gas_prod_total)) +
  geom_col(fill="#6495ed")

We can also visualize the same set of formations. To define the set, we will take 5 most productive gas, oil and condensate formations.

# define 5 most productive formations
best_formations <- unique(most_oil$Formtn_name,
                            most_gas$Formtn_name,
                            most_cond$Formtn_name)

# plot oil production
wells %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = Oil_prod_vol_m3, fill=Formtn_name)) +
    geom_col() +
  ggtitle("Total oil production by formation") +
  xlab("Formation") +
  ylab("Oil production [m3]")

# plot gas production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = gas_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total gas production by formation") + 
  xlab("Formation") + 
  ylab("Gas production [e3m3]")

# plot condensate production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = cond_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total condensate production by formation") + 
  xlab("Formation") + 
  ylab("Condensate production [m3]")

# plot water production
prod_by_formation %>% 
  subset(Formtn_name %in% best_formations) %>%
  ggplot(aes(x = Formtn_name, y = water_prod_total, fill=Formtn_name)) +
    geom_col() + 
  ggtitle("Total water production by formation") + 
  xlab("Formation") + 
  ylab("Water production [m3]")

line graphs

Line graphs are useful to visualize the change in time. We can visualize the trends in production for example.

# reminder: available formation names in our dataframe 
unique(wells$Formtn_name)[1:20]
##  [1] "QUATERNARY"          "CRETACEOUS"          "CARDIUM SAND"       
##  [4] "DOE CREEK"           "DUNVEGAN"            "BASE OF FISH SCALES"
##  [7] "SCATTER"             "PADDY"               "PADDY- CADOTTE"     
## [10] "CADOTTE"             "SPIRIT RIVER"        "NOTIKEWIN"          
## [13] "FALHER"              "FALHER A"            "FALHER B"           
## [16] "FALHER C"            "FALHER D"            "WILRICH"            
## [19] "BLUESKY"             "BASAL BLUESKY"
# let's visualize only one formation first
formtnName = "BLUESKY"
prod_df %>%
  filter(Formtn_name == formtnName) %>% 
  group_by(Prod_ym, Formtn_name, Prod_type) %>% 
  summarise(
    total_prod = sum(Prod_volume)
    ) %>% 
  ungroup() %>% 
  ggplot(aes(x = Prod_ym, y = total_prod, group=Formtn_name, col=Prod_type)) +
  geom_line() + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01",
                              "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free") + 
  ggtitle(paste("Production per year for formation", formtnName))
## `summarise()` has grouped output by 'Prod_ym', 'Formtn_name'. You can override using the `.groups` argument.

#  and now production from all formations in one chart
prod_df %>%
  group_by(Prod_ym, Prod_type) %>% 
  summarise(
    total_prod = sum(Prod_volume)
    ) %>% 
  ungroup() %>% 
  ggplot(aes(x = Prod_ym, y = total_prod, group=1, col=Prod_type)) +
  geom_line() + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", 
                              "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free") +
  ggtitle("Total production per year")
## `summarise()` has grouped output by 'Prod_ym'. You can override using the `.groups` argument.

FACETS - subplots according to some variable

let’s see how facets can increase the visibility of our charts (and see the example of geom_bar as well)

# here we plot the count of wells per year for all formations
ggplot(data = wells, aes(x = Prod_year)) +
  geom_bar()

# filter wells for most productive formations and most recent wells. Add some colors (fill) selected manually (scale_fill_manual)
ggplot(filter(wells, Formtn_name %in% best_formations & Prod_year < 2021)) +
  geom_bar(aes(x = Formtn_name, fill=Formtn_name)) + 
  scale_fill_manual(values = c('#d0d1e6','#a6bddb','#67a9cf','#1c9099','#016c59')) + # taken e.g. from colorbrewer
  facet_wrap(~ Prod_year) + 
  theme(axis.text.x = element_blank(), # remove ticks #cosmetics
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank())

Bonus: Interactive area charts!

Making interactive plots in R is extremely easy. First, we need to create standard “static” chart and then feed it to plotly.

library(plotly)

head(prod_df)
##   Water_prod_vol_m3 Prod_volume Prod_year Prod_ym         Prod_type Prod_days
## 1              91.4           0      2018 2018-09 Gas_prod_vol_e3m3      30.0
## 2            1291.0           0      2020 2020-03 Gas_prod_vol_e3m3      30.7
## 3            2528.0           0      2019 2019-01 Gas_prod_vol_e3m3      30.8
## 4            1068.0           0      2017 2017-09 Gas_prod_vol_e3m3      23.0
## 5            6948.7           0      2018 2018-05 Gas_prod_vol_e3m3      31.0
## 6             109.3           0      2020 2020-01 Gas_prod_vol_e3m3      31.0
##   Formtn_name
## 1  QUATERNARY
## 2  QUATERNARY
## 3  QUATERNARY
## 4  QUATERNARY
## 5  QUATERNARY
## 6  QUATERNARY
# let's filter the data first and summarize the production per period
prod_per_period <- prod_df %>% 
  filter(Formtn_name %in% best_formations) %>% 
  group_by(Prod_ym, Prod_type, Formtn_name) %>% 
  summarise(
    prod_per_period = sum(Prod_volume)
  )

# let's plot it according to the production type
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Formtn_name, fill=Formtn_name, text = paste("Formation:", Formtn_name, "<br>Production vol:", prod_per_period))) + 
  geom_area(alpha=0.3) + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", "2019-01","2020-01", "2021-01")) +
  facet_grid(Prod_type ~ ., scales = "free")

p <- ggplotly(p, tooltip = "text")
p
# let's plot it according to the formation 
p <- ggplot(prod_per_period, aes(x=Prod_ym, y=prod_per_period, group=Prod_type, 
                                 text = paste("Formation:", Formtn_name, 
                                              "<br>Prod. type:", Prod_type), fill=Prod_type)) +
  geom_area(colour="#636363", alpha=0.3) + 
  scale_x_discrete(breaks = c("2016-01", "2017-01","2018-01", 
                              "2019-01","2020-01", "2021-01")) + 
  facet_grid(Formtn_name ~ ., scales = "free") + 
  ylab("") + 
  xlab("")

p <- ggplotly(p, tooltip = "text")
p

Visualize data on map!

Let’s move to another dataset provided by BCOGC to access the location of the wells and plot them together with geophysical lines, O&G pools and fields. O&G field => group of O&G pools

Here we will use geom_sf geometry and sf package, commonly used for geospatial data manipulation. We fill use following sf functions:

rm(list = ls(all=TRUE)) # clear global environment

sprintf("Current directory: %s", getwd())
## [1] "Current directory: C:/Users/Paulina-laptop/Desktop/R_programming/RLadies_workshop"
library(sf) # install.packages(sf) if you don't have it
# load dplyr and ggplot2 in case this is the only chunk you want to run
library(dplyr) 
library(ggplot2, warn.conflicts = FALSE) 
wells_coords <- read.csv(unzip("BCOGC_data/drill_csv.zip", "wells.csv"), skip = 1)  

head(wells_coords)#bigger file
##       Well.Surf.Loc                                               Well.Name
## 1 D- 080-H/092-G-03                    ROYAL   CITY NO. 1 D- 080-H/092-G-03
## 2 B- 050-D/082-G-01             CANADIAN   KOOTENAY NO. 1 B- 050-D/082-G-01
## 3 D- 055-A/082-G-02                   BORDER OILS   NO. 1 D- 055-A/082-G-02
## 4      13-11-081-17                           CANLIN   PINGEL  13-11-081-17
## 5 A- 065-E/093-I-16 CVE ENERGY ET AL  LONE MOUNTAIN NO. 1 A- 065-E/093-I-16
## 6 C- 048-D/103-G-05     STRATHCONA   QUEEN CHARLOTTE NO.1 C- 048-D/103-G-05
##   WA.Num Surf.Nad27.Lat Surf.Nad27.Long Surf.Nad83.Lat Surf.Nad83.Long
## 1      1       49084860       123064913       49085314       123065320
## 2      2       49020730       114294872       49035335       114497851
## 3      3       49025250       114331128       49047892       114554121
## 4      4             NA              NA       56004511       120331605
## 5      5       54530889       120254600       54530863       120255126
## 6      6       53172250       131590333       53171412       131575692
##   Surf.UTM.Zone.Num Surf.UTM83.Northng Surf.UTM83.Eastng Surf.North Surf.East
## 1                10            5443925          491629.8     -192.0     -71.3
## 2                11            5434400          682875.8         NA        NA
## 3                11            5435662          678718.6         NA        NA
## 4                10            6210173          652456.4     -201.2     221.3
## 5                10            6085099          664789.3      274.9    -285.3
## 6                 9            5908329          302312.1     -463.9    -107.9
##   Surf.Owner Ground.Elevtn Directional.Flag Surf.Ref.Corner Surf.Ref.Unit
## 1      CROWN           1.5                N              NE            80
## 2      CROWN            NA                N              NW            50
## 3      CROWN            NA                N              SW            55
## 4      CROWN         758.6                N              NW            NA
## 5      CROWN         971.6                N              SE            65
## 6      CROWN           8.3                N              NE            48
##   Surf.Ref.Block Surf.Ref.Map Surf.Ref.Lsd Surf.Ref.Sect Surf.Ref.Twp
## 1              H     092-G-03           NA            NA           NA
## 2              D     082-G-01           NA            NA           NA
## 3              A     082-G-02           NA            NA           NA
## 4                                       13            11           81
## 5              E     093-I-16           NA            NA           NA
## 6              D     103-G-05           NA            NA           NA
##   Surf.Ref.Range Surf.DLS.Exception Surf.Lsd Surf.Sect Surf.Twp Surf.Range
## 1             NA                          NA        NA       NA         NA
## 2             NA                          NA        NA       NA         NA
## 3             NA                          NA        NA       NA         NA
## 4             17                          13        11       81         17
## 5             NA                          NA        NA       NA         NA
## 6             NA                          NA        NA       NA         NA
##   Surf.Qtr.Unit Surf.NTS.Exception Surf.Unit Surf.Block Surf.Map Oper.Id
## 1             D                           80          H 092-G-03       0
## 2             B                           50          D 082-G-01       0
## 3             D                           55          A 082-G-02       0
## 4                                         NA                        1018
## 5             A                           65          E 093-I-16    1016
## 6             C                           48          D 103-G-05   11075
##   Oper.Abbrev Oper.Abbrev2 Optnl.Unit       Well.Area.Name Well.Name.Date
## 1       ROYAL                                   CITY NO. 1       19480610
## 2    CANADIAN                               KOOTENAY NO. 1       19490903
## 3 BORDER OILS                                        NO. 1       19480704
## 4      CANLIN                                       PINGEL       20171206
## 5  CVE ENERGY        ET AL             LONE MOUNTAIN NO. 1       20180607
## 6  STRATHCONA                         QUEEN CHARLOTTE NO.1       20200901
##   Special.Well.Class.Code Test.Hole
## 1                      NO         N
## 2                      NO         N
## 3                                 N
## 4                      NO         N
## 5                      NO         N
## 6                      NO         N
names(wells_coords)
##  [1] "Well.Surf.Loc"           "Well.Name"              
##  [3] "WA.Num"                  "Surf.Nad27.Lat"         
##  [5] "Surf.Nad27.Long"         "Surf.Nad83.Lat"         
##  [7] "Surf.Nad83.Long"         "Surf.UTM.Zone.Num"      
##  [9] "Surf.UTM83.Northng"      "Surf.UTM83.Eastng"      
## [11] "Surf.North"              "Surf.East"              
## [13] "Surf.Owner"              "Ground.Elevtn"          
## [15] "Directional.Flag"        "Surf.Ref.Corner"        
## [17] "Surf.Ref.Unit"           "Surf.Ref.Block"         
## [19] "Surf.Ref.Map"            "Surf.Ref.Lsd"           
## [21] "Surf.Ref.Sect"           "Surf.Ref.Twp"           
## [23] "Surf.Ref.Range"          "Surf.DLS.Exception"     
## [25] "Surf.Lsd"                "Surf.Sect"              
## [27] "Surf.Twp"                "Surf.Range"             
## [29] "Surf.Qtr.Unit"           "Surf.NTS.Exception"     
## [31] "Surf.Unit"               "Surf.Block"             
## [33] "Surf.Map"                "Oper.Id"                
## [35] "Oper.Abbrev"             "Oper.Abbrev2"           
## [37] "Optnl.Unit"              "Well.Area.Name"         
## [39] "Well.Name.Date"          "Special.Well.Class.Code"
## [41] "Test.Hole"
# subset dataframe
wells_coords <- wells_coords %>% select(Surf.UTM83.Northng, Surf.UTM83.Eastng, Surf.UTM.Zone.Num, Well.Area.Name, Oper.Abbrev, Surf.Owner)

# select only wells from the UNT10 and UTM11 zones with not-missing coordinates (complete.cases(.))
wells_coords <- wells_coords %>% filter(complete.cases(.), Surf.UTM.Zone.Num %in% c(10,11))
unique(wells_coords$Surf.UTM.Zone.Num)
## [1] 10 11
# create 2 geodataframes with crs values according to UTM zone
wells_pts_1 <- wells_coords %>% 
  filter(Surf.UTM.Zone.Num == 10) %>% 
  st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 26910) %>% 
  st_transform(4326) # want to use latlon coords

wells_pts_2 <- wells_coords %>% 
  filter(Surf.UTM.Zone.Num == 11) %>%  
  st_as_sf(coords = c("Surf.UTM83.Eastng", "Surf.UTM83.Northng"), crs = 26910) %>%
  st_transform(4326) # want to use latlon coords

# bind dataframe rows vertically and check the coordinate system
wells_pts <- rbind(wells_pts_1, wells_pts_2)
st_crs(wells_pts)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
# now let's load geoophysical lines
geoph_lines <- st_read("BCOGC_data/shapefiles/PASR_GEOPHYSICAL_LN_subset.shp")
## Reading layer `PASR_GEOPHYSICAL_LN_subset' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\PASR_GEOPHYSICAL_LN_subset.shp' using driver `ESRI Shapefile'
## Simple feature collection with 49710 features and 19 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 1266576 ymin: 1173555 xmax: 1374678 ymax: 1265680
## Projected CRS: NAD83 / BC Albers
# we will focus only on the lines within Dawson Area
geoph_lines_dawson <- filter(geoph_lines, PROG_NAME %in% c("DAWSON 3D","DAWSON 3D EXTENSION"))

# let's set the same crs as the wells
geoph_lines_dawson <- st_transform(geoph_lines_dawson, st_crs(wells_pts))

# merge the lines and create the polygon to filter the wells
dawson_area <- st_union(geoph_lines_dawson) %>% 
  st_convex_hull()
## although coordinates are longitude/latitude, st_union assumes that they are planar
# filter the wells
wells_dawson <- wells_pts %>%
  filter(st_within(x = ., y = dawson_area, sparse = FALSE))
## although coordinates are longitude/latitude, st_within assumes that they are planar
# plot together
ggplot() + 
  geom_sf(data = wells_dawson, size=1, alpha=0.2, colour='blue') + 
  geom_sf(data = geoph_lines_dawson, size=0.1)

# let's add O&G pools
pools <- st_read("BCOGC_data/shapefiles/POOL_CONTOURS_LN.shp")
## Reading layer `POOL_CONTOURS_LN' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\POOL_CONTOURS_LN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 7963 features and 12 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 1091006 ymin: 1070402 xmax: 1386930 ymax: 1679933
## Projected CRS: NAD83 / BC Albers
# get the production pools intersecting the Dawson area only
pools <- st_transform(pools, st_crs(wells_dawson))
pools_dawson <- pools %>%
  filter(st_intersects(x = ., y = dawson_area, sparse = FALSE))
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
# plot it
ggplot() + 
  geom_sf(data = wells_dawson, alpha=0.2) + 
  geom_sf(data = geoph_lines_dawson, alpha=0.2) + 
  geom_sf(data = pools_dawson, aes(colour=FLUID_TYPE)) +
  scale_color_discrete(name = "Producing pools") +
  xlab("Longitude") + 
  ylab("Latitude")

# Let's add one more information:O&G fields. Transform to the crs of the wells
fields <- st_read("BCOGC_data/shapefiles/FIELDS_PY.shp")
## Reading layer `FIELDS_PY' from data source `C:\Users\Paulina-laptop\Desktop\R_programming\RLadies_workshop\BCOGC_data\shapefiles\FIELDS_PY.shp' using driver `ESRI Shapefile'
## Simple feature collection with 570 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1079392 ymin: 1066850 xmax: 1387498 ymax: 1680540
## Projected CRS: NAD83 / BC Albers
ggplot() + 
  geom_sf(data = fields)

fields <- st_transform(fields, st_crs(wells_dawson))

# let's focus on the fields in the smaller area (crop the layer)
fields <- st_crop(fields, xmin = -121, xmax = -120, ymin = 55.7, ymax = 56)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
unique(fields$FLDRNM)
##  [1] "MICA"           "PARKLAND"       "DOE"            "DAWSON CREEK"  
##  [5] "GROUNDBIRCH"    "SATURN"         "SUNSET PRAIRIE" "BRIAR RIDGE"   
##  [9] "BRASSEY"        "TOWER LAKE"     "SUNDOWN"        "SUNRISE"
# let's visualize only Dawson field 
field_dawson <- fields %>% filter(FLDRNM == "DAWSON CREEK")

# let's plot it together 
ggplot() + 
  geom_sf(data = wells_dawson, alpha=0.2) +
  geom_sf(data = geoph_lines_dawson, alpha=0.2) +
  geom_sf(data = pools, aes(colour=FLUID_TYPE)) +
  geom_sf(data = field_dawson, fill=NA, linetype="dashed", colour="blue", size=0.5) +
  scale_color_discrete(name = "Producing pools") +
  coord_sf(crs = 4326, xlim = c(-120.5,-120), ylim = c(55.7, 56)) + 
  xlab("Longitude") + 
  ylab("Latitude")